#!usr/bin/env python3
# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

'''preparing data'''
data = pd.read_csv('anscombe.csv')
data['dataset'].describe()

'''training the model'''
def slr(X_train,y_train):
    cor = np.corrcoef(X_train,y_train)[0][1]
    X_train = pd.DataFrame(X_train)
    clf = LinearRegression()
    clf.fit(X_train,y_train)
    xmin, xmax = X_train.min(), X_train.max()
    xx = np.linspace(xmin, xmax, 101)
    yy = clf.coef_[0] * xx + clf.intercept_
    x1,x2,x3,x4,x5,x6,x7 = float(X_train.mean()),\
        float(y_train.mean()),float(X_train.var()),\
        float(y_train.var()),cor,float(clf.coef_[0]),\
        float(clf.intercept_)
    info = f"Mean of x:{x1:.4f}\nMean of y:{x2:.4f}\n\
Sample variance of x:{x3:.4f}\nSample variance of y:{x4:.4f}\n\
Correlation between x and y:{x5:.4f}\n\
Regressor:y = {x6:.4f}x + {x7:.4f}"
    return xx,yy,info

'''visualization'''
fig, sub = plt.subplots(2, 2,figsize=(16,16))
plt.subplots_adjust(wspace=0.2, hspace=0.2)
titles = ('dataset I','dataset II','dataset III','dataset IV')
groups = data.groupby('dataset')
for (key,group), title, ax in zip(groups, titles, sub.flatten()):
    _ = ax.scatter(group['x'], group['y'])
    xx,yy,info = slr(group['x'], group['y'])
    _ = ax.plot(xx, yy,'r-')
    _ = ax.set_xlim(2,20)
    _ = ax.set_ylim(2,14)
    _ = ax.set_title(title)
    _ = ax.text(2.5,10.5,info,fontsize=6,bbox=dict(facecolor='none', alpha=0.5))
plt.show()
